Teste
```r
df <- readxl::read_excel('./mobile_app_user_dataset_1.xlsx')[-1,]
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
### Exploração
> Uma vez que é uma pesquisa sobre *mobile devices*, veremos qual a proporção de pessoas que de fato possuem um *device*.
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuYGBgclxuXG5kZiAlPiUgXG4gIHNlbGVjdChRMikgJT4lIFxuICBkcm9wX25hKCkgJT4lIFxuICBtdXRhdGUoXG4gICAgUTIgPSBjYXNlX3doZW4oXG4gICAgICBRMiA9PSAxIH4gXFxQb3NzdWlcXCxcbiAgICAgIFEyICE9IDEgfiBcXE7Dg8KjbyBQb3NzdWlcXFxuICAgIClcbiAgKSAlPiUgXG4gIGdncGxvdChcbiAgICBhZXMoXG4gICAgICB5ID0gUTIsXG4gICAgICBmaWxsID0gUTJcbiAgICApXG4gICkgK1xuICBnZW9tX2JhcihcbiAgICBwb3NpdGlvbiA9IFxcZG9kZ2VcXCxcbiAgKSArXG4gIGdlb21fbGFiZWwoXG4gICAgICBzdGF0ID0gJ2NvdW50JyxcbiAgICAgIGFlcyhcbiAgICAgICAgbGFiZWwgPSAuLmNvdW50Li4sXG4gICAgICApLFxuICAgICAgY29sb3IgPSAnd2hpdGUnLFxuICAgICAgc2hvdy5sZWdlbmQgPSBGQUxTRVxuICApICsgXG4gIGxhYnMoXG4gICAgeSA9IFxcXFwsXG4gICAgeCA9IFxcVG90YWwgZGUgUGVzc29hc1xcLFxuICAgIHRpdGxlID0gXFxEaXN0cmlidWnDg8Knw4PCo28gZGUgcGVzc29hcyBxdWUgdGVtIG91IG7Dg8KjbyBjZWx1bGFyXFwsXG4gICAgc3VidGl0bGUgPSBcXERhZG9zIG9yaXVuZG9zIGRhIHBlc3F1aXNhIHJlYWxpemFkYSBwZWxhIEhhcnZhcmRcXFxuICApICtcbiAgc2NhbGVfZmlsbF9tYW51YWwoXG4gICAgdmFsdWVzID0gYyggXFwjQzQxNjFDXFwsIFxcIzAwOTQ5MVxcKSxcbiAgKSArXG4gIHRoZW1lX2NsYXNzaWMoKSArXG4gIHRoZW1lKFxuICAgIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dCh2anVzdD0tLjIsIHNpemU9MTEpLFxuICAgIGxlZ2VuZC50aXRsZSA9IGVsZW1lbnRfYmxhbmsoKVxuICApIFxuXG5gYGBcbmBgYCJ9 -->
```r
```r
df %>%
select(Q2) %>%
drop_na() %>%
mutate(
Q2 = case_when(
Q2 == 1 ~ \Possui\,
Q2 != 1 ~ \Não Possui\
)
) %>%
ggplot(
aes(
y = Q2,
fill = Q2
)
) +
geom_bar(
position = \dodge\,
) +
geom_label(
stat = 'count',
aes(
label = ..count..,
),
color = 'white',
show.legend = FALSE
) +
labs(
y = \\,
x = \Total de Pessoas\,
title = \Distribuição de pessoas que tem ou não celular\,
subtitle = \Dados oriundos da pesquisa realizada pela Harvard\
) +
scale_fill_manual(
values = c( \#C4161C\, \#009491\),
) +
theme_classic() +
theme(
axis.title.x = element_text(vjust=-.2, size=11),
legend.title = element_blank()
)
Podemos visualizar que a grande parte das pessoas possuem celular.
A proporção de pessoas que não tem é de 12%.
Visualizaremos agora os diferentes tipos de dispositivos utilizados pelos usuários que possuem celular
phone_format <- function (phone_list, phone_type, apply_function = function(param) param) {
phone_dic = list(
apple = c(\apple\, \iphone\, \ipad\, \aple\, \appale\, \ipod\, \aplle\, \i-phone\, \ipone\, \applke\, \applr\, \appme\, \iphon\),
blackberry = c(\blackberry\, \blackb\, \blackeb\, \baclkberry\, \blakckberry\, \blacberry\, \blakberry\, \blackerry\, \bleckberry\),
samsung = c(\samsung\, \samsumg\, \sansung\, \sumsung\, \samsug\, \samsun\, \samgung\, \samsing\, \samung\, \sansug\, \samasung\, \samsang\, \samsong\, \sumsang\, \galaxynote\),
null = c(\\\?\, \9000\, \930p\),
sony_ericsson = c(\sony-\, \sonyer\, \sony\, \erison\),
nokia = c(\nokia\, \nokya\),
asus = c(\asus\),
acer = c(\acer\)
)
str_detect(phone_list, paste(phone_dic[[phone_type]], collapse = \|\)) ~ apply_function(phone_type)
}
phones <- df %>%
filter(Q2 == 1) %>%
select(Q3_1_TEXT) %>%
drop_na() %>%
mutate(
Q3 = str_replace(str_to_lower(Q3_1_TEXT), \ \, \\)
) %>%
mutate(
Q3 = case_when(
phone_format(Q3, \apple\),
phone_format(Q3, \samsung\),
phone_format(Q3, \blackberry\),
phone_format(Q3, \null\),
phone_format(Q3, \sony_ericsson\, function(param) str_replace(param, \_\, \ \)),
TRUE ~ Q3
)
) %>%
group_by(Q3) %>%
count() %>%
arrange(desc(n))
phones
length(names(df))
[1] 161
df
q5_answers <- data.frame(
row.names = c(1, 2, 3, 4, 5, 6, 7, 8, 9),
val = c(\Never\, \Less than once a month \, \Once a month\, \More than once a month\, \Once a week\, \More than once a week\, \Once a day\, \Several times a day\, \Other\)
)
df %>%
select(Q5) %>%
drop_na() %>%
mutate(
Q5_TEXT = qr_answers[Q5, ]
) %>%
group_by(Q5_TEXT) %>%
count() %>%
arrange(desc(n)) %>%
ggplot(
aes(
x = reorder(Q5_TEXT, -n),
y = n,
fill = Q5_TEXT
)
) +
geom_col() +
labs(
x = 'Frequencia de abertura da loja de aplicativos',
y = 'Total'
) +
theme(
axis.text.x = element_text(angle = 45, vjust=.6),
)
NA
q6_answers <- data.frame(
row.names = c(1, 2, 3, 4, 5, 6),
val = c("0 - 1", "2 - 5", "6 - 10", "11 - 20", "21 - 30", "Mais de 30")
)
df %>%
select(Q6) %>%
drop_na() %>%
mutate(
Q6_TEXT = q6_answers[Q6,]
) %>%
group_by(Q6_TEXT) %>%
count() %>%
ggplot(
aes(
x = reorder(Q6_TEXT, -n),
y = n,
fill = Q6_TEXT
)
) +
geom_col() +
labs(
x = 'Quantidade de aplicativos baixados por mês',
y = 'Total',
) +
theme_classic() +
labs(
fill = 'Frequência'
) +
theme(
axis.text.x = element_text(vjust = -1),
axis.title.x = element_text(vjust = -1),
)
Vamos ver se a galera que mais baixa é a galera que mais acessa a loja
df %>%
select(Q5, Q6) %>%
drop_na() %>%
mutate(
Q5 = q5_answers[Q5,],
Q6 = q6_answers[Q6, ]
) %>%
group_by(Q5, Q6) %>%
count() %>%
arrange(desc(n)) %>%
ggplot(
aes(
y = reorder(Q5, -n),
x = n,
fill = Q6
)
) +
geom_bar(
stat = "identity",
position = position_dodge(width = 1)
) +
geom_label(
aes(
label = n,
),
size = 3
) +
labs(
y = 'Frequência de acesso à loja de aplicativos',
x = 'Total'
) +
theme(
axis.text.x = element_text(angle = 20, vjust = 0.5),
) +
facet_grid(rows = 'Q6')
arrange_and_plot <- function(df,
cols,
named_cols,
desc_col = \reason\,
legend.position = \none\,
title = \\,
xlabel = \\,
ylabel = \\,
show.legend = FALSE,
col.width = 0.5,
dodge.width = 0.5,
xaxis.title.size = 13,
xaxis.title.vjust = 0.5,
yaxis.title.size = 13,
yaxis.title.vjust = 0,
xaxis.text.angle = 0,
xaxis.text.vjust = 0,
invert.axis = FALSE,
hide.yaxis.title = FALSE,
hide.xaxis.title = FALSE
) {
rdf <- df %>%
select(cols) %>%
rowwise() %>%
sapply(as.numeric) %>%
as.tibble() %>%
rowwise() %>%
replace(is.na(.), 0) %>%
rowwise() %>%
sapply(sum, simplify = FALSE) %>%
as.tibble()
names(rdf) <- named_cols
plot <- rdf %>%
gather(
desc_col, \total\, 1:ncol(.)
) %>%
ggplot(
aes(
x = if (invert.axis) total else reorder(desc_col, -total),
y = if (invert.axis) reorder(desc_col, total) else total,
fill = desc_col
)
) +
geom_col(
width = col.width,
position = position_dodge(dodge.width)
)
if (invert.axis) {
plot <- plot +
geom_label(
aes(
x = total,
label = total,
),
show.legend = show.legend
)
} else {
plot <- plot +
geom_label(
aes(
y = total,
label = total,
),
show.legend = show.legend
)
}
plot +
labs(
title = title,
x = xlabel,
y = ylabel
) +
theme_classic() +
theme(
legend.position = legend.position,
axis.title.x = if (!hide.xaxis.title) element_text(size=xaxis.title.size, vjust=xaxis.title.vjust) else element_blank(),
axis.title.y = if (!hide.yaxis.title) element_text(size=yaxis.title.size, vjust=yaxis.title.vjust) else element_blank(),
axis.text.x = element_text(angle = xaxis.text.angle, vjust=xaxis.text.vjust)
)
}
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
join_columns_and_plot <- function(dfp, cols) {
dfp %>%
select(cols) %>%
rowwise() %>%
sapply(as.numeric) %>%
as.tibble() %>%
rowwise() %>%
replace(is.na(.), 0) %>%
rowwise() %>%
sapply(sum, simplify = FALSE) %>%
as.tibble()
}
q7_names <-c("Feeling Depressed", "Need to carry out a task", "Feeling bored", "Want to be entertained", "Need to know something", "Other")
q7_cols <- names(df)[(21:26)]
df %>%
arrange_and_plot(
q7_cols,
q7_names,
title = 'Fatores motivadores que levam as pessoas a baixarem apps',
xlabel = 'Motivo',
ylabel = 'Total',
dodge.width = 0.5,
col.width = 0.6,
xaxis.title.vjust = -0.5
)
Warning: `as.tibble()` was deprecated in tibble 2.0.0.
Please use `as_tibble()` instead.
The signature and semantics have changed, see `?as_tibble`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Note: Using an external vector in selections is ambiguous.
ℹ Use `all_of(cols)` instead of `cols` to silence this message.
ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
q8_cols = names(df)[(27:35)]
q8_names <- c(
"Compare several to choose one",
"Download the first presented",
"Featured apps",
"Among top downloads",
"Randomly choose one that might interest",
"Search using keywords",
"Visit websites that review apps",
"Use search engines",
"Other"
)
df %>%
arrange_and_plot(
q8_cols,
q8_names,
title = 'Fatores motivadores que levam as pessoas a baixarem apps',
xlabel = 'Total',
ylabel = 'Motivo',
dodge.width = 0.5,
col.width = 0.6,
invert.axis = TRUE
)
NA
q9_cols <- names(df)[(36:48)]
q9_names <- c(
"Reviews by other users",
"Name of app (e.g., catchy name)",
"Number of users who have downloaded the app",
"Icon (e.g., if the icon is attractive)",
"Description of the app",
"Features",
"Number of users who have rated the app",
"Price",
"Star rating",
"Size of app",
"Screen shots (e.g., see how it looks running)",
"Who developed the app",
"Other"
)
df %>%
arrange_and_plot(
q9_cols,
q9_names,
"Motivo",
title = "Motivos para escolher o aplicativo para baixar",
xlabel = "Total",
ylabel = "Porque baixa",
col.width = 0.7,
invert.axis = TRUE,
hide.yaxis.title = TRUE,
hide.xaxis.title = TRUE
)
q10_cols <- names(df)[(49:(49+14))]
q10_names <- c(
"To interact with friends and/or family.",
"To interact with people I don't know.",
"To help me carry out a task.",
"It is featured in the app store.",
"It is on the top downloads chart.",
"It is advertised in the apps that I am using. ",
"For entertainment.",
"Out of curiosity. ",
"An impulsive purchase.",
"It features brands or celebrities that\nI like\n(e.g., Coca-Cola, Michael Jackson). ",
"It was mentioned in the media\n(e.g., TV, newspaper, radio, blogs). ",
"It is an extension of the \nwebsite that I use (e.g., Facebook app). ",
"It is recommended by friends and/or family. ",
"For someone else (e.g., children, partner).",
"Other"
)
df %>%
arrange_and_plot(
q10_cols,
q10_names,
"Motivo",
title = "Motivos que levam ao download do app",
xlabel = "Total",
ylabel = "",
col.width = 0.6,
dodge.width = 1,
invert.axis = TRUE,
hide.yaxis.title = TRUE,
hide.xaxis.title = TRUE
)
q9_cols <- names(df)[(64:(63+12))]
df %>%
arrange_and_plot(
q9_cols,
q9_cols,
invert.axis = TRUE
)
countries = c(
"American",
"Australian",
"Brazilian",
"British",
"Canadian",
"Chinese",
"French",
"German",
"Indian",
"Italian",
"Japanese",
"Mexican",
"Russian",
"South Korean",
"Spanish",
"Other",
"Not assigned - NA"
)
country_df <- df %>%
group_by(Q19) %>%
count() %>%
dplyr::mutate(
Q19 = as.numeric(Q19)
) %>%
dplyr::arrange(Q19) %>%
mutate(
Q19 = as.character(Q19)
)
country_df$Q19 <- countries
country_df %>%
ggplot(
aes(
y = reorder(Q19, n),
x = n,
fill = Q19
)
) +
geom_bar(
stat = "identity",
position = position_dodge(10)
) +
labs(
x = 'Total de pessoas',
y = 'País',
fill = ''
) +
theme(
legend.position = "none",
axis.text.x = element_text(vjust=0.5, angle = 15)
)
df %>%
select(names(df)[c(76:79)]) %>%
drop_na
logit_user_feats <- c("Q5", "Q6", "Q16", "Q17", "Q18", "Q19", "Q23", "Q24", "Q26", "Q27")
df_logit <- df %>%
pivot_longer(starts_with("Q11_"), names_to="Potential", values_to="Potential_value") %>%
mutate(
Potential = as.factor(as.numeric(str_extract(Potential, "([0-9]+)$"))),
Q17 = as.integer(Q17)
) %>%
drop_na("Potential_value") %>%
select(
logit_user_feats, starts_with("Q7_"), starts_with("Q8_"),
starts_with("Q9_"), starts_with("Q10_"), starts_with("Q11_"),
starts_with("Q13_"), starts_with("Q14_"), starts_with("Q15_"),
Potential
) %>%
mutate_all(
funs(as.numeric(.))
) %>%
mutate_all(
funs(replace_na(., 0))
) %>%
mutate(
Potential = as.factor(Potential)
)
df_logit <- df %>%
pivot_longer(starts_with("Q11_"), names_to="Potential", values_to="Potential_value") %>%
mutate(
Potential = as.factor(as.numeric(str_extract(Potential, "([0-9]+)$"))),
Q17 = as.integer(Q17)
) %>%
drop_na("Potential_value") %>%
select(
logit_user_feats, starts_with("Q7_"), starts_with("Q8_"),
starts_with("Q9_"), starts_with("Q10_"), starts_with("Q11_"),
starts_with("Q13_"), starts_with("Q14_"), starts_with("Q15_"),
Potential
) %>%
mutate_all(
funs(as.numeric(.))
) %>%
mutate_all(
funs(replace_na(., 0))
) %>%
mutate(
Potential = as.factor(Potential)
)
df %>%
drop_na(Q17) %>%
group_by(Q17) %>%
count() %>%
arrange(desc(n))
df_logit %>%
select(Potential) %>%
group_by(Potential) %>%
count() %>%
arrange(n)
NA
logit_fit
Call:
nnet::multinom(formula = Potential ~ ., data = train, family = "binomial",
MaxNWts = 10000)
Coefficients:
(Intercept) Q5 Q6 Q16 Q17 Q18 Q19
2 -2.831077 0.19314421 0.004161363 -0.3357207 -0.009387795 0.26225274 -0.007136793
3 -2.516666 0.14964958 0.062203240 -0.4114570 0.004371494 0.06412545 -0.043747198
4 -1.643322 0.02473883 0.038363577 -0.3069615 0.002536063 0.13703026 -0.017205541
5 -2.277490 0.13373126 0.050118183 -0.2801466 -0.005069066 0.15275730 0.003345424
6 -1.603665 0.05867205 -0.027128103 -0.2174974 -0.011237502 0.05232554 -0.024047783
7 -1.995366 0.12635273 0.064530462 -0.1977068 -0.008755982 0.21198390 -0.020068860
8 -2.841578 0.09212726 0.084028442 -0.5093149 -0.007700749 0.22589499 -0.038007138
9 -2.676805 0.14611839 0.083300078 -0.2886305 -0.008733964 0.18167614 -0.028369655
10 -2.139975 -0.05098983 -0.426191281 -0.1625053 0.010872033 0.02276676 -0.035750468
11 -2.506288 0.21970822 0.020871833 -0.4863416 -0.014578997 0.26021978 -0.008588354
Q23 Q24 Q26 Q27 Q7_1 Q7_2 Q7_3
2 -0.11475293 -0.011932040 -0.017762590 0.001224567 -0.3273897 -0.006565511 -0.12778748
3 -0.07388938 0.029846580 -0.070107874 -0.018509979 -0.1886862 -0.069507829 0.03635108
4 0.07250743 -0.023823120 -0.056606214 -0.014272725 -0.2970201 0.033226531 -0.24981206
5 0.05211095 -0.018508863 -0.004296008 -0.015381987 -0.2153086 0.005961373 -0.25281799
6 0.10221845 -0.015402683 -0.018769485 -0.021353499 -0.2226241 0.099047243 -0.02343900
7 -0.00587136 -0.003947630 -0.082206616 -0.021578089 -0.5231015 0.078552878 -0.17132130
8 0.21164926 -0.016366188 -0.068373565 -0.016606348 0.1757757 -0.142145512 -0.26783935
9 0.12220663 -0.027326144 -0.061018920 -0.015603917 -0.4257900 0.278902985 -0.20898424
10 0.01902384 -0.009246391 -0.012659432 -0.024073792 -1.4169030 -0.576445905 -0.62210996
11 0.11007697 -0.015533882 -0.075624455 0.004333665 -0.1266129 -0.062915609 -0.12461105
Q7_4 Q7_5 Q7_6 Q8_1 Q8_2 Q8_3 Q8_4
2 0.07402743 -0.04926993 0.42389213 0.4546129 0.054833705 0.25633159 0.18109714
3 -0.02432760 -0.09508477 0.41086061 0.2739049 -0.018247110 0.19367040 0.18530447
4 0.10814208 0.04369644 0.38750228 0.1739729 0.235013824 0.39712599 -0.03026940
5 0.16844707 0.15029793 -0.22545247 0.3261790 0.084001712 0.18183069 0.08173635
6 -0.05931305 0.08812191 0.17429044 0.2996749 -0.190020827 0.12068714 0.17854717
7 0.20030625 -0.04474142 0.18000242 0.2624625 -0.077350666 0.21700502 0.14269081
8 -0.53531851 -0.07689812 -0.91673528 0.4044391 0.237044845 0.09141466 0.23006722
9 0.01582758 0.14088673 0.04411266 0.3744076 -0.178807358 0.41666936 0.14154636
10 -0.31341224 -0.13515732 0.37093932 0.4664463 0.691994620 -0.19002242 -0.06748627
11 0.30474982 0.18214132 -0.80370520 0.2061919 -0.136897988 0.16972730 0.14954853
Q8_5 Q8_7 Q8_8 Q8_9 Q8_10 Q9_1 Q9_2
2 -0.0927440011 0.22843554 0.6188075 0.28729806 -0.09479455 -0.17562083 -0.06637064
3 0.1180314934 0.20553981 0.3035256 0.26245845 0.20784151 0.04127033 -0.13585906
4 -0.1508795522 0.04273248 0.4728787 0.12342009 -0.55997498 -0.27201047 0.01691151
5 -0.0733877951 0.16236867 0.2946529 0.19563806 0.03857156 0.07773995 -0.14257689
6 -0.1803180913 0.10406247 0.2311458 0.05093760 -0.30938381 0.11858252 -0.17357420
7 -0.0005617848 0.06975359 0.4759930 0.22199743 -0.01609591 -0.05934720 -0.27227179
8 0.1126394863 0.16660657 0.8086141 0.46023488 -0.21607017 -0.19009589 -0.26620072
9 -0.2582946078 0.04400909 0.5689429 -0.01205932 -0.38115538 -0.08692433 -0.18962640
10 0.2978943298 0.22562167 0.1769604 0.42010753 0.92040894 -0.18067628 0.28628392
11 -0.1039243429 -0.03672964 0.5048358 -0.03904161 -0.78991114 -0.02439985 -0.20106557
Q9_3 Q9_4 Q9_5 Q9_6 Q9_7 Q9_8 Q9_9
2 0.010794258 0.15501643 -0.01258929 0.01750330 -0.083674849 0.66093929 0.01879016
3 -0.005616802 -0.01413053 0.08863840 0.01071956 -0.081620317 0.92029361 -0.09614364
4 0.250670230 -0.10329657 0.02686441 -0.07008140 -0.050111043 0.16261538 -0.08588886
5 0.158764070 0.26121889 0.15058093 0.04660817 -0.058973640 0.30142604 0.00578427
6 0.072381441 -0.03027443 0.28362386 0.18080872 -0.125079342 0.59189995 -0.12307577
7 0.038574579 0.18874711 0.14475552 0.38167847 -0.018639986 0.26782217 0.09370258
8 -0.189706186 0.29495112 -0.02754503 0.13553867 0.109301344 0.35364094 -0.09946875
9 0.195708876 0.11186023 0.08867708 0.08432934 -0.007565676 0.25988088 -0.05095026
10 -0.341531332 -0.35313099 0.20057838 -0.08483723 0.068390101 0.08239902 -0.68873167
11 0.161127118 0.16666630 0.12963231 0.05253685 0.016356211 0.45419501 -0.18510437
Q9_10 Q9_11 Q9_12 Q9_13 Q10_1 Q10_2 Q10_3
2 0.06941249 0.2838817087 0.4076446 -1.6125788 0.18849654 0.38378006 0.09888458
3 -0.01264827 0.1636982459 0.3256305 -2.9167772 0.10182969 0.02727020 0.20251712
4 0.15283003 -0.0510164725 0.2642467 -0.7174872 0.12072349 0.45336666 0.22499325
5 0.07090717 0.1030745363 0.1253765 0.2689275 0.16636850 0.19256866 0.33599264
6 -0.01924235 0.0880114819 0.2902647 -1.3245600 0.14196463 0.13784582 0.41006395
7 -0.01305607 0.1015923307 0.2415514 -1.0859023 -0.02410912 0.07253304 0.21761326
8 -0.08755311 0.0004197098 0.5042589 -2.4168312 0.33684650 0.26144058 -0.01639989
9 0.08039864 0.1327298874 0.2256257 0.2663563 0.04713361 0.33730367 0.22052983
10 -0.67898960 0.3334148956 -1.0354792 0.6090934 0.22271883 -0.74929341 0.17924710
11 0.06684000 -0.0216071559 0.5183782 -2.0375151 0.14507755 0.43563286 0.24543953
Q10_4 Q10_5 Q10_6 Q10_7 Q10_8 Q10_9 Q10_10 Q10_11
2 -0.01824869 0.10215193 0.162355558 -0.33870546 -0.2435150 0.9041642 0.03481875 0.17984589
3 0.24460641 -0.03625905 0.080329952 0.16365242 -0.1236544 0.9157578 0.04951013 0.24666361
4 0.15931557 -0.10728450 0.308677295 0.05042364 -0.3388083 0.6370137 0.37231696 0.29242588
5 0.10859082 0.06250834 0.193782955 -0.14071722 -0.2186133 0.8479041 0.04337904 0.09852663
6 0.26849668 -0.05349044 0.170773541 -0.01561051 -0.1483034 0.8704540 -0.04689152 0.12906971
7 0.20008255 -0.06543156 0.263411862 -0.16118019 -0.1445994 1.0195425 0.34318033 0.18168666
8 0.24475656 0.11643685 -0.003948784 -0.14975360 -0.1224306 0.8263344 0.44298220 0.20467052
9 0.39733921 0.10852853 0.272558620 0.05258584 -0.2994447 1.0068521 0.13137397 0.04130151
10 0.31338940 -0.76191371 -0.819047089 0.02146000 0.4696538 1.1814127 -0.39299128 0.57138472
11 0.14090285 0.05401613 0.157245222 -0.01455343 -0.2273161 0.8907470 0.19554274 0.20504680
Q10_12 Q10_13 Q10_14 Q10_15 Q13_1 Q13_2 Q13_3
2 0.22650233 0.01253063 0.10675680 -0.69091773 -0.8067342 -0.63669450 0.057701261
3 0.02549043 0.15536189 0.04250272 0.51432525 -0.8924092 -0.26751989 0.062381803
4 0.12342114 -0.03259669 0.10735930 -0.82617839 -1.1075524 -0.23291396 0.277940534
5 0.17346289 0.14596063 0.12767655 -0.74173957 -0.7299857 0.02392404 0.120803486
6 0.17728992 -0.05290743 0.25601113 -1.56052923 -0.3578338 -0.12878161 0.285393923
7 0.13232849 0.09257728 -0.10785407 -2.08681825 -0.6314696 -0.02376094 0.101484270
8 0.33601827 0.47444825 -0.13937788 -2.73883541 -0.9520312 0.04001074 0.242747669
9 0.11934481 0.19800758 0.06571727 -0.05160603 -0.7877894 -0.09882180 0.006756926
10 -0.37719835 -0.12026632 -1.26232357 3.09416727 -0.9221408 -0.75894517 -1.246875591
11 -0.03779334 0.16771581 -0.12082981 0.12441657 -0.7970070 0.03673945 0.206048689
Q13_4 Q13_5 Q13_6 Q13_7 Q14_1 Q14_2 Q14_3
2 0.02447495 0.41085965 0.01236241 0.216524234 -0.42339013 0.16687301 0.64047720
3 -0.09410995 -0.01490311 0.10327466 -0.405644290 -0.21720073 0.01196077 0.26645161
4 -0.21049144 0.05630345 0.19512296 -0.251444538 -0.18012077 0.20326034 0.43899264
5 0.05424756 0.13820074 0.22268393 0.123192510 -0.12463029 0.12346176 0.13101793
6 -0.01863604 0.02155430 0.08190169 0.563211705 -0.04024215 0.17348826 0.22944540
7 -0.19918635 -0.08157111 0.04227276 -0.365884642 -0.26828743 0.22196887 0.32539145
8 -0.28864745 0.22403966 -0.17446306 0.178967532 -0.45251514 0.33066588 0.23462200
9 -0.10832183 0.07317816 0.09625005 1.911494063 -0.19144835 0.20568804 0.21634041
10 -0.12054782 0.12827956 -0.72118470 0.898523803 -0.56958297 0.89333305 -0.21785836
11 -0.12203964 0.09006985 -0.11722242 0.005613312 -0.16816271 0.09377080 0.02509614
Q14_4 Q14_5 Q14_6 Q14_7 Q14_8 Q14_9 Q14_10
2 0.07140485 -0.26567013 -0.25129490 0.30203243 -0.42751502 0.045219330 -0.011489388
3 -0.01518089 0.14005862 0.08127085 0.34339798 -0.47022875 -0.035324252 -0.002281833
4 0.02193581 0.09384575 0.13019480 0.02192473 -0.46624910 0.210361040 0.027846321
5 -0.04802637 -0.26022717 0.18958431 0.06085254 -0.19555087 -0.032012518 -0.014500094
6 -0.02086528 -0.38317048 0.07713133 0.09796064 -0.31268075 0.222255862 0.059880137
7 -0.10508543 0.01409683 -0.14651419 0.05481870 -0.35642590 0.022960793 -0.089668210
8 -0.08840425 0.11196218 -0.14691357 0.13599169 -0.50426472 0.276170256 -0.085434843
9 0.09277991 -0.13973942 -0.26492603 0.05665647 -0.23859828 0.249661073 -0.055284070
10 -0.06952550 0.13085555 -0.03925273 0.47919262 0.02120778 -0.005914517 0.020280392
11 0.17193083 0.02754811 -0.16745450 0.01038164 -0.33134883 -0.161581020 -0.098744432
Q14_11 Q14_12 Q14_13 Q14_15 Q14_14 Q15_1 Q15_2
2 -0.229503180 0.262958528 -0.088168418 -0.18357401 0.36375534 0.05810586 -0.09733213
3 -0.001239108 -0.075826309 -0.112957114 -0.37397575 -1.31930510 0.14091441 -0.03081196
4 -0.315487224 -0.007346129 0.222386225 -0.28383966 -1.21942287 0.05122575 -0.06325836
5 0.066230837 -0.059986489 0.005158116 -0.42832907 -0.42928068 0.13615237 0.22059837
6 -0.095986255 -0.180277655 0.170708408 -0.17453253 0.08275291 0.13943228 0.04579892
7 0.076512557 0.052639798 -0.006889711 -0.34412733 0.13282396 -0.11720144 0.14338829
8 -0.469625879 -0.041115423 -0.233210269 -0.16128050 -0.25252432 -0.21576047 0.27073734
9 -0.195739508 -0.006969107 0.002077257 -0.30650357 0.01097325 -0.06273817 0.19679786
10 0.116274467 0.349799506 -0.024606874 -0.38092847 0.29626501 0.21029537 0.18387818
11 -0.177963413 -0.284788502 -0.045453081 -0.09792457 -0.50663398 0.20842377 0.03821965
Q15_3 Q15_4 Q15_5 Q15_6 Q15_7 Q15_8 Q15_9 Q15_10
2 -0.40488343 0.34428650 0.4891544 -0.14251733 0.38386558 -0.18224086 -0.11912023 -0.09996463
3 -0.13608256 0.08706679 0.1061022 0.17730599 0.05108660 -0.10473172 -0.09425231 0.09431644
4 0.14950542 0.33989602 0.1830148 0.10328183 0.03640673 0.10349762 -0.08827289 0.26166627
5 -0.42961260 0.18572163 0.2393143 -0.08936696 -0.11378233 0.01055479 -0.03109619 -0.02824228
6 -0.08582446 0.08831954 0.3321812 -0.04410963 0.06720909 -0.08996181 -0.21562052 0.07080580
7 -0.20082430 0.18746675 0.3303431 0.10924398 -0.02852805 -0.14390277 0.06653401 0.18871334
8 -0.12530735 0.70345397 0.3166134 0.13840664 0.02992726 -0.26204245 0.23936382 0.07050664
9 -0.40551026 0.27086580 0.1275485 0.05598596 -0.02439782 -0.14804981 -0.09801881 0.11010110
10 -0.57682101 0.09470300 0.1651234 0.39384591 0.23589631 -0.71144656 -0.25760026 -0.32517041
11 -0.07959043 0.32230413 0.1483324 -0.13773477 0.04503476 0.12662749 0.32575176 0.03236908
Q15_11 Q15_12 Q15_13 Q15_14 Q15_15 Q15_16 Q15_17
2 0.16032766 0.03665671 -1.403467e-02 -0.3886217 0.01779135 -0.21293620 0.22096561
3 0.26226532 0.16004692 4.121008e-02 -0.4067031 0.01711580 0.06409822 0.04549150
4 0.38574035 -0.28937310 -2.393017e-02 -0.2601586 -0.21436901 -0.22503506 0.23383482
5 0.18580903 0.03083824 9.703141e-02 -0.4467016 0.10191144 -0.16475444 0.09935191
6 0.05875572 -0.05034018 -8.837030e-02 -0.3885621 -0.18442755 -0.12654367 0.34786799
7 0.22865406 -0.03515164 4.344807e-02 -0.2866016 -0.13753391 -0.07210316 0.10080014
8 0.27140979 0.10749261 -1.175003e-01 -0.2946104 -0.11991293 -0.28494410 -0.11678627
9 0.29777150 -0.10590795 3.983593e-05 -0.2911798 0.06847877 0.11152112 0.17092460
10 -0.75558885 -0.03411834 5.158809e-01 -0.5950110 0.23862376 -0.12471037 0.68524004
11 0.42323762 -0.24392313 -9.101441e-02 -0.4035014 0.12010785 -0.08984163 0.10878433
Q15_18 Q15_19 Q15_20 Q15_21 Q15_22 Q15_23
2 -0.31330692 0.01240573 -0.003992033 0.45381678 0.013077043 -1.9702690
3 -0.27774286 -0.11811972 -0.010654094 0.07298789 0.019539959 -0.1057625
4 -0.11021496 -0.22199759 -0.143727902 -0.02325880 0.094172456 -0.5613058
5 -0.26980062 -0.05021865 -0.076960114 -0.18655709 -0.062958614 0.1491968
6 0.13678916 -0.08966400 -0.035833686 0.14855685 0.030043329 -5.5666995
7 -0.10368830 0.04608581 0.022320927 0.06391885 -0.009857221 0.2192474
8 0.09517296 -0.15855948 0.022567302 -0.33017371 0.164728979 -0.4915665
9 0.02824367 -0.18306298 0.022958288 0.02514391 0.263908911 -1.8954204
10 0.04876036 -0.21687427 -0.042411349 0.62679404 -0.081083855 -0.7456743
11 -0.21195829 -0.29953099 -0.215043711 0.00739897 0.118867569 1.0260196
[ reached getOption("max.print") -- omitted 1 row ]
Residual Deviance: 30507.05
AIC: 32685.05
ytest <- test %>%
select(Potential)
MLmetrics::Accuracy(ypred, ytest$Potential)
[1] 0.2975238
Se fossemos chutar uma classe dentre as 12 possíveis, teríamos 1/12 em média de acertos. Isso é menor que a acurácia obtida, então no geral nosso modelo conseguiu aprender um pouco sobre os diferentes perfis.
pROC::roc(response = ytest$Potential, predictor=as.numeric(ypred)) %>%
plot
Warning in roc.default(response = ytest$Potential, predictor = as.numeric(ypred)) :
'response' has more than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc' instead
Setting levels: control = 1, case = 2
Setting direction: controls < cases
df_km <- df_logit %>%
select(-Potential)
adjust_kmeans <- function(k) {
k_fit <- df_knn %>%
kmeans(k, iter.max = 30)
w <- k_fit$tot.withinss
tibble(k = k, w = w)
}
k_adjust <- map_dfr(2:10, adjust_kmeans)
k_adjust %>%
ggplot(
aes(
x = k,
y = w
)
) +
geom_line() +
geom_point(colour = "red", size = 3) +
theme_minimal(12)
Iremos separar em 5 grupos…
k_adj <- df_km %>%
kmeans(5, iter.max = 30)
length(k_adj$cluster)
[1] 10499
df_km$cluster = k_adj$cluster
df_km %>%
select(Q5, cluster) %>%
drop_na() %>%
mutate(
Q5 = q5_answers[Q5,],
cluster = as.factor(cluster)
) %>%
group_by(Q5, cluster) %>%
count() %>%
arrange(desc(n)) %>%
ggplot(
aes(
x = reorder(Q5, -n),
y = n,
fill = cluster
)
) +
geom_bar(
stat = "identity",
position = "fill"
) +
labs(
y = 'Frequência de acesso à loja de aplicativos',
x = 'Total'
) +
theme(
axis.text.x = element_text(angle = 20, vjust = 0.5),
)
Conseguimos ver que 2 grupos parecem acessar mais a loja do que os demais…
Visualizaremos a correlação:
cor(df_km$Q5, df_km$cluster)
[1] -0.2072271
Quanto maior o uso, menor o cluster… é o que conseguimos ver também em nosso gráfico.
Prosseguiremos…
df %>%
select(starts_with(col), cluster) %>%
pivot_longer(cols = starts_with(col), names_to = names_to, values_to = values_to)
Error in `select()`:
! `match` must be a character vector of non empty strings.
Backtrace:
1. df %>% select(starts_with(col), cluster) %>% ...
24. tidyselect::starts_with(col)
25. tidyselect:::check_match(match)
Os grupos parecem ter uma distribuição homogenea no que diz respeito as razões por baixas aplicativo.